import os
import codecs


def remove_same_line(source_file_path,target_file_path):
    result = []
    raw_addr_word = codecs.open(source_file_path,'r',"utf-8")
    tmp_word = ""
    for line in raw_addr_word:
        word = line.strip()
        if word.find(tmp_word)!=-1 and tmp_word != "":
            continue
        else:
            result.append(word)
            tmp_word = word[0:-30]
    content = "\n".join(result)
    result_file = codecs.open(target_file_path,'w',"utf-8")
    result_file.write(content)
    result_file.close()

remove_same_line("data/addr_word_train.txt","data/addr_word_train_lite.txt")